This notebook presents Keypoint Detection using a CNN on the CAT dataset.
Resources
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import PIL
import PIL.ImageDraw
Limit TensorFlow GPU memory usage
import tensorflow as tf
gpu_options = tf.GPUOptions(allow_growth=True) # init TF ...
config=tf.ConfigProto(gpu_options=gpu_options) # w/o taking ...
with tf.Session(config=config): pass # all GPU memory
Point this to dataset directory, folder should contain CAT_00, CAT_01 and so on.
dataset_location = '/home/marcin/Datasets/cat-dataset/cats/'
def plot_images(indices, images, features):
def draw_keypoints(img, keypoints, r=2, c='red'):
"""Draw keypoints on PIL image"""
draw = PIL.ImageDraw.Draw(img)
for x, y in keypoints:
draw.ellipse([x-r, y-r, x+r, y+r], c)
return img
_, iw, ih, _ = images.shape
assert iw == ih
tmp_images = images[indices]
tmp_features = features[indices]
predictions = model.predict(tmp_features)
kps = (predictions * (iw // 2)) + (iw // 2)
_, axes = plt.subplots(nrows=1, ncols=len(indices), figsize=[12,4])
if len(indices) == 1: axes = [axes]
for i, idx in enumerate(indices):
img = PIL.Image.fromarray(images[idx])
axes[i].imshow(draw_keypoints(img, kps[i]))
axes[i].axis('off')
plt.show()
In this section we:
Subfolders within dataset
folders_all = ['CAT_00', 'CAT_01', 'CAT_02', 'CAT_03', 'CAT_04', 'CAT_05', 'CAT_06']
Get paths to all images
def build_image_files_list(folders):
image_files_list = []
for folder in folders:
wild_path = os.path.join(dataset_location, folder, '*.jpg')
image_files_list.extend(sorted(glob.glob(wild_path)))
return image_files_list
image_paths_all = build_image_files_list(folders_all)
print('Nb images:', len(image_paths_all))
image_paths_all[:3]
Helper to load keypoint data from .cat files
def load_keypoints(path):
"""
.cat is a single-line text file in format: 'nb_keypoints x1, y1, x2, y2, ...'
"""
with open(path, 'r') as f:
line = f.read().split() # [nb_keypoints, x1, y1, x2, y2, ...]
keypoints_nb = int(line[0]) # int
keypoints_1d = np.array(line[1:], dtype=int) # np.ndarray, [x1, y1, x2, y2, ...]
keypoints_xy = keypoints_1d.reshape((-1, 2)) # np.ndarray, [[x1, y1], [x2, y2], ...]
assert keypoints_nb == len(keypoints_xy)
assert keypoints_nb == 9 # always nine keypoints, eyes, nose, two ears
return keypoints_xy # np.ndarray, [[x1, y1], [x2, y2], ...]
Helper to draw keypoints on the image
def draw_keypoints(img, keypoints, r=2, c='red'):
"""Draw keypoints on PIL image"""
draw = PIL.ImageDraw.Draw(img)
for x, y in keypoints:
draw.ellipse([x-r, y-r, x+r, y+r], c)
return img
Open single image and load corresponding keypoints
example_path = image_paths_all[0]
img = PIL.Image.open(example_path)
kps = load_keypoints(example_path+'.cat')
Show example keypoints
display(kps)
Show example image
display(draw_keypoints(img.copy(), kps))
Helper to scale image and keypoints
def scale_img_kps(image, keypoints, target_size):
width, height = image.size
ratio_w = width / target_size
ratio_h = height / target_size
image_new = image.resize((target_size, target_size), resample=PIL.Image.LANCZOS)
keypoints_new = np.zeros_like(keypoints)
keypoints_new[range(len(keypoints_new)), 0] = keypoints[:,0] / ratio_w
keypoints_new[range(len(keypoints_new)), 1] = keypoints[:,1] / ratio_h
return image_new, keypoints_new
Test it
img2, kps2 = scale_img_kps(img, kps, target_size=224)
display(draw_keypoints(img2.copy(), kps2))
Helper to load and transform both input image and keypoints
def load_image_keypoints(image_path, keypoints_path, target_size):
image = PIL.Image.open(image_path)
keypoints = load_keypoints(keypoints_path)
image_new, keypoints_new = scale_img_kps(image, keypoints, target_size)
return image, keypoints, image_new, keypoints_new
Show couple more examples
idx = 21
image, keypoints, image_new, keypoints_new = load_image_keypoints(
image_paths_all[idx], image_paths_all[idx]+'.cat', target_size=224)
display(draw_keypoints(image.copy(), keypoints))
display(draw_keypoints(image_new.copy(), keypoints_new))
Preprocess Images
images_list = []
keypoints_list = []
for i, image_path in enumerate(image_paths_all):
_, _, image_new, keypoints_new = \
load_image_keypoints(image_path, image_path+'.cat', target_size=224)
image_arr = np.array(image_new)
# assert image_arr.shape == (224, 224, 3)
# assert 0 <= image_arr.min() <= 255
images_list.append(image_arr)
keypoints_list.append(keypoints_new)
if i % 1000 == 0:
print('i:', i)
images = np.array(images_list)
keypoints = np.array(keypoints_list)
print('images.shape:', images.shape)
print('images.dtype:', images.dtype)
print('images.min()', images.min())
print('images.max()', images.max())
Note that some keypoints are outside of image (e.g. when cat ear is cropped out)
print('keypoints.shape:', keypoints.shape)
print('keypoints.dtype:', keypoints.dtype)
print('keypoints.min()', keypoints.min())
print('keypoints.max()', keypoints.max())
Sanity check
idx = 1
display(draw_keypoints(PIL.Image.fromarray(images[idx]).copy(), keypoints[idx]))
Save Data
dataset_npz = os.path.join(dataset_location, 'cats_224.npz')
print(dataset_npz)
# np.savez(dataset_npz, images=images, keypoints=keypoints)
In this section we:
Dataset file
dataset_npz = os.path.join(dataset_location, 'cats_224.npz')
print(dataset_npz)
Load data
npzfile = np.load(dataset_npz)
images = npzfile['images']
keypoints = npzfile['keypoints']
Preprocess
Convert input into ImageNet format. This converts to float, scales and offsets to match distribution of ImageNet training data.
features = tf.keras.applications.mobilenet_v2.preprocess_input(images)
print('features.shape:', features.shape)
print('features.dtype:', features.dtype)
print('features.min()', features.min())
print('features.max()', features.max())
Convert targets to range -1..1
targets = (keypoints - 112) / 112
print('targets.shape:', targets.shape)
print('targets.dtype:', targets.dtype)
print('targets.min()', targets.min())
print('targets.max()', targets.max())
Save Data
processed_npz = os.path.join(dataset_location, 'processed_224.npz')
print(processed_npz)
np.savez(processed_npz, features=features, targets=targets)
dataset_npz = os.path.join(dataset_location, 'cats_224.npz')
processed_npz = os.path.join(dataset_location, 'processed_224.npz')
print(dataset_npz)
print(processed_npz)
npzfile = np.load(dataset_npz)
images = npzfile['images']
npzfile = np.load(processed_npz)
features = npzfile['features']
targets = npzfile['targets']
Split into training and validation
split = 8000
train_images = images[:split]
train_features = features[:split]
train_targets = targets[:split]
valid_images = images[split:]
valid_features = features[split:]
valid_targets = targets[split:]
Define model
X_inputs = tf.keras.layers.Input(shape=(224, 224, 3))
mobilenetv2 = tf.keras.applications.mobilenet_v2.MobileNetV2(
input_shape=(224, 224, 3), alpha=1.0, include_top=False,
weights='imagenet', input_tensor=X_inputs, pooling='max')
X = tf.keras.layers.Dense(128, activation='relu')(mobilenetv2.layers[-1].output)
X = tf.keras.layers.Dense(64, activation='relu')(X)
X = tf.keras.layers.Dense(18, activation='linear')(X)
X = tf.keras.layers.Reshape((9, 2))(X)
model = tf.keras.models.Model(inputs=X_inputs, outputs=X)
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='mse')
Custom callback for plotting
class CallbackPlot(tf.keras.callbacks.Callback):
def on_train_begin(self, logs={}):
pass
def on_epoch_end(self, batch, logs={}):
plot_images([10, 20, 30, 40, 50, 60], train_images, train_features)
plot_images([10, 20, 30, 40, 50, 60], valid_images, valid_features)
Show some cats before training. Most probably there won't be any keypoints shown
plot_images([10, 20, 30, 40, 50, 60], train_images, train_features)
plot_images([10, 20, 30, 40, 50, 60], valid_images, valid_features)
Run training
#
# Callbacks
#
# tb_logdir = os.path.expanduser('~/logs/')
# tb_counter = len([log for log in os.listdir(tb_logdir) if 'cats' in log]) + 1
# callback_tb = tf.keras.callbacks.TensorBoard(
# log_dir=tb_logdir + 'cats' + '_' + str(tb_counter), )
callback_mc = tf.keras.callbacks.ModelCheckpoint(
'model.h5', save_best_only=True, verbose=1)
callback_lr = tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=0.2, patience=5, verbose=1)
# callback_plt = CallbackPlot()
#
# Train
#
hist = model.fit(train_features, train_targets, epochs=50, batch_size=32, shuffle=True,
validation_data=(valid_features, valid_targets),
callbacks=[
#callback_tb,
callback_mc,
callback_lr,
#callback_plt,
]
)
Plot loss during training
_, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(12,4))
ax1.plot(hist.history['loss'], label='loss')
ax1.plot(hist.history['val_loss'], label='val_loss')
ax1.legend()
ax2.plot(hist.history['loss'], label='loss')
ax2.plot(hist.history['val_loss'], label='val_loss')
ax2.legend()
ax2.set_ylim(0, .1)
ax3.plot(hist.history['lr'], label='lr')
ax3.legend()
plt.tight_layout()
plt.show()
Show some cats from validation set - looks pretty good
for j in range(0, 50, 5):
plot_images([i for i in range(j, j+5)], valid_images, valid_features)
Investigate more closely
idx = 73
plot_images([idx], valid_images, valid_features)